2019-12-20

ĀæPOR QUƉ ESTOS DATOS?

Comentario ATP Tour oficial:





This is awesome! We would love to speak to you about creating content for the ATP“s channels, drop us an email at socialmedia@.

ĀæQUƉ VAMOS A HACER?

OBJETIVO DEL PROYECTO

  1. Importar
  2. Ordenar / Transformar
  3. Visualizar
  4. Comunicar

IMPORTAR

file_to_download <- "rankings_1973-2017.csv"
url <- paste0("https://datahub.io/
sports-data/atp-world-tour-tennis-data/r/7.html",
    file_to_download)
download.file(url, destfile = file_to_download)
file_to_download <- "player_overviews_unindexed_csv.csv"
url <- paste0("https://datahub.io/sports-data/
atp-world-tour-tennis-data/r/8.html",
    file_to_download)
download.file(url, destfile = file_to_download)
atp_database_1 <- import(here::here("./rankings_1973-2017.csv"))

atp_database_2 <- import(here::here("./player_overviews_unindexed_csv.csv"))

ORDENAR / TRANSFORMAR




ESTRUCTURA DE LOS DATOS

df <- full_join(atp_database_1,atp_database_2, by = "player_id")
'data.frame':   2694539 obs. of  14 variables:
 $ week_title     : chr  "2017.11.20" "2017.11.20" "2017.11.20" "2017.11.20" ...
 $ week_year      : int  2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
 $ week_month     : int  11 11 11 11 11 11 11 11 11 11 ...
 $ week_day       : int  20 20 20 20 20 20 20 20 20 20 ...
 $ rank_text      : chr  "1" "2" "3" "4" ...
 $ rank_number    : int  1 2 3 4 5 6 7 8 9 10 ...
 $ move_positions : int  NA NA 3 1 1 1 1 1 2 NA ...
 $ move_direction : chr  "" "" "up" "down" ...
 $ player_age     : int  31 36 26 20 24 29 26 25 32 26 ...
 $ ranking_points : int  10645 9605 5150 4610 4015 3805 3775 3165 3150 2615 ...
 $ tourneys_played: int  18 17 23 25 27 22 26 22 15 25 ...
 $ player_url     : chr  "/en/players/rafael-nadal/n409/overview"
"/en/players/roger-federer/f324/overview" "/en/players/grigor-dimitrov/d875/overview" ...
 $ player_slug    : chr  "rafael-nadal" "roger-federer" ...
 $ player_id      : chr  "n409" "f324" "d875" "z355" ...

LIMPIEZA DE DATOS

        df1 <- df %>% select(-c(move_positions, move_direction,
            player_url.x, player_url.y, player_id, player_slug.y,
            residence, birthdate, birth_year, birth_month,
            birth_day, turned_pro, weight_lbs, height_ft, height_inches)) %>%
    
            rename(player = player_slug.x, date = week_title,
            month = week_month, year = week_year, country = flag_code)

        df1 <- df1 %>%
            mutate(date = gsub(".", "-", date, fixed = TRUE)) %>%
            mutate(player = gsub("-", "_", player, fixed = TRUE)) %>%
            mutate(player = gsub("%20", "_", player, fixed = TRUE)) %>%
            mutate(player = gsub("juan_martin_del_potro",
            "del_potro", player, fixed = TRUE))

        df2 <- janitor::clean_names(df1, case = "snake")

PRESENTACIƓN DE VARIABLES




names(df2)
#>  [1] "date"            "year"            "month"           "week_day"       
#>  [5] "rank_text"       "rank_number"     "player_age"      "ranking_points" 
#>  [9] "tourneys_played" "player"          "first_name"      "last_name"      
#> [13] "country"         "birthplace"      "weight_kg"       "height_cm"      
#> [17] "handedness"      "backhand"

VISUALIZAR




EJEMPLOS



  • BUMP CHART

  • TABLAS

  • MAPAS

  • BAR CHART RACE

BUMP CHART

CONSTRUYENDO EL GRƁFICO

A DESTACAR:

df3 <- df2 %>% select(date, year, month, country, player,
        rank_number, ranking_points) %>%
        group_by(date) %>%
        filter((year ==  2010) & (rank_number < 16)) %>%
        group_by(player, date) %>%
        arrange(date) %>%
        mutate(day = as.numeric(as.Date(date) - 14612)) %>%
        as.data.frame() %>%
        ungroup()

(Help)

mutate(day = as.numeric(as.Date(date) - 14612)) %>%
as.data.frame()

df4 <- df3 %>% mutate(flag = ifelse(player %in% c("roger_federer",
    "rafael_nadal","novak_djokovic",
    "andy_murray","del_potro"), TRUE, FALSE),
     player_col = if_else(flag == TRUE, player, "zzz"))

TABLAS

PORCENTAJE DIESTROS/ZURDOS TOP 100 AƑO 2017:

handedness NN percent
Left-Handed 16 0.16
Right-Handed 84 0.84


PORCENTAJE TIPO REVƉS TOP 100 AƑO 2017:

backhand NN percent
One-Handed Backhand 21 0.21
Two-Handed Backhand 79 0.79

ALTURA Y PESO MEDIO TOP 100 AƑOS 2013 - 2017:


date altura_media peso_medio
2013-12-30 187.21 80.99
2014-12-29 186.99 80.80
2015-12-28 186.99 80.58
2016-12-26 186.80 80.08
2017-11-20 187.08 80.26

EDAD AL CONVERTIRSE EN PROFESIONAL:


player birth_year turned_pro years_turned_pro
rafael_nadal 1986 2001 15
roger_federer 1981 1998 17
grigor_dimitrov 1991 2008 17
alexander_zverev 1997 2013 16
dominic_thiem 1993 2011 18



player birth_year turned_pro years_turned_pro
marin_cilic 1988 2005 17
david_goffin 1990 2009 19
jack_sock 1992 2011 19
stan_wawrinka 1985 2002 17
pablo_carreno_busta 1991 2009 18

MAPAS

CONSTRUYENDO EL MAPA

df3 <- df3 %>%
    mutate(country = gsub("SUI", "CHE", country, fixed = TRUE)) %>%
    mutate(country = gsub("BUL", "BGR", country, fixed = TRUE)) %>%
    mutate(country = gsub("GER", "DEU", country, fixed = TRUE)) %>%
    mutate(country = gsub("CRO", "HRV", country, fixed = TRUE)) %>%
    mutate(country = gsub("RSA", "RUS", country, fixed = TRUE))


df4 <- df3 %>%
    filter(year > 2013) %>%
    arrange(date) %>%
    group_by(player, year) %>%
    slice(n()) %>%
    ungroup()

BAR CHART RACE




ĀæLO HEMOS CONSEGUIDO?




A MEDIAS

FIN